Regression Models Course Project

Some functions to compute predicted R squared

PRESS <- function(linear.model) {
  #' calculate the predictive residuals
  pr <- residuals(linear.model)/(1-lm.influence(linear.model)$hat)
  #' calculate the PRESS
  PRESS <- sum(pr^2)
  
  return(PRESS)
}
pred_r_squared <- function(linear.model) {
  #' Use anova() to get the sum of squares for the linear model
  lm.anova <- anova(linear.model)
  #' Calculate the total sum of squares
  tss <- sum(lm.anova$'Sum Sq')
  # Calculate the predictive R^2
  pred.r.squared <- 1-PRESS(linear.model)/(tss)
  
  return(pred.r.squared)
}
model_fit_stats <- function(linear.model) {
  r.sqr <- summary(linear.model)$r.squared
  adj.r.sqr <- summary(linear.model)$adj.r.squared
  pre.r.sqr <- pred_r_squared(linear.model)
  PRESS <- PRESS(linear.model)
  return.df <- data.frame(r.squared = r.sqr, adj.r.squared = adj.r.sqr, pred.r.squared = pre.r.sqr, press = PRESS)
  return(return.df)
}
lmp <- function (modelobject) {
    if (class(modelobject) != "lm") stop("Not an object of class 'lm' ")
    f <- summary(modelobject)$fstatistic
    p <- pf(f[1],f[2],f[3],lower.tail=F)
    attributes(p) <- NULL
    return(p)
}

Data and library load, integrity check, and summary

library(datasets)
library(ggplot2)
library(plyr)
data(mtcars)
sum(is.na(mtcars))
## [1] 0
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000

Explore the data

qplot(mpg, geom="histogram",data=mtcars,binwidth=0.5)

qplot(wt, geom="histogram",data=mtcars,binwidth=0.5)

qplot(cyl, geom="histogram",data=mtcars,binwidth=0.5)

qplot(disp, geom="histogram",data=mtcars,binwidth=0.5)

qplot(hp, geom="histogram",data=mtcars,binwidth=0.5)

qplot(carb, geom="histogram",data=mtcars,binwidth=0.5)

qplot(qsec, geom="histogram",data=mtcars,binwidth=0.5)

qplot(gear, geom="histogram",data=mtcars,binwidth=0.5)

qplot(am, geom="histogram",data=mtcars,binwidth=0.5)

qplot(vs, geom="histogram",data=mtcars,binwidth=0.5)

qplot(drat, geom="histogram",data=mtcars,binwidth=0.5)

wt vs mpg

qplot(wt,mpg,data=mtcars,geom=c("point","smooth"),method="loess")

disp vs mpg

qplot(disp,mpg,data=mtcars,geom=c("point","smooth"),method="loess")

hp vs mpg

qplot(hp,mpg,data=mtcars,geom=c("point","smooth"),method="loess")

qsec vs mpg

qplot(qsec,mpg,data=mtcars,geom=c("point","smooth"),method="loess")

drat vs mpg

qplot(drat,mpg,data=mtcars,geom=c("point","smooth"),method="loess")

tables

table(mtcars$am)
## 
##  0  1 
## 19 13
table(mtcars$cyl)
## 
##  4  6  8 
## 11  7 14
table(mtcars$vs)
## 
##  0  1 
## 18 14
table(mtcars$gear)
## 
##  3  4  5 
## 15 12  5
table(mtcars$carb)
## 
##  1  2  3  4  6  8 
##  7 10  3 10  1  1

Look at mpg for (0) automatic and (1) manual transmissions

boxplot(mpg ~ factor(am), data = mtcars, xlab="transmission", ylab="mpg")

mean(subset(mtcars, am == 0)$mpg)
## [1] 17.14737
mean(subset(mtcars, am == 1)$mpg)
## [1] 24.39231

lm01 - Fit a model with mpg as the response with just the factor variable transmission

lm01 <- lm(mpg ~ factor(am), data = mtcars)
summary(lm01)
## 
## Call:
## lm(formula = mpg ~ factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.3923 -3.0923 -0.2974  3.2439  9.5077 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   17.147      1.125  15.247 1.13e-15 ***
## factor(am)1    7.245      1.764   4.106 0.000285 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.902 on 30 degrees of freedom
## Multiple R-squared:  0.3598, Adjusted R-squared:  0.3385 
## F-statistic: 16.86 on 1 and 30 DF,  p-value: 0.000285
pred_r_squared(lm01)
## [1] 0.2626133
plot(lm01)

Correlations

sort(cor(mtcars)[1,]) # wt,cyl,disp,hp,carb,qsec,gear,am,vs,drat,mpg
##         wt        cyl       disp         hp       carb       qsec 
## -0.8676594 -0.8521620 -0.8475514 -0.7761684 -0.5509251  0.4186840 
##       gear         am         vs       drat        mpg 
##  0.4802848  0.5998324  0.6640389  0.6811719  1.0000000
cor(mtcars$cyl,mtcars$disp)
## [1] 0.9020329
cor(mtcars$wt,mtcars$disp)
## [1] 0.8879799
cor(mtcars$cyl,mtcars$hp)
## [1] 0.8324475
cor(mtcars$disp,mtcars$hp)
## [1] 0.7909486
cor(mtcars$wt,mtcars$cyl)
## [1] 0.7824958
cor(mtcars$wt,mtcars$hp)
## [1] 0.6587479

lm02 - Fit model with weight and transmission predicting mpg

lm02 <- lm(mpg ~ wt + factor(am), data = mtcars)
summary(lm02)
## 
## Call:
## lm(formula = mpg ~ wt + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5295 -2.3619 -0.1317  1.4025  6.8782 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 37.32155    3.05464  12.218 5.84e-13 ***
## wt          -5.35281    0.78824  -6.791 1.87e-07 ***
## factor(am)1 -0.02362    1.54565  -0.015    0.988    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.098 on 29 degrees of freedom
## Multiple R-squared:  0.7528, Adjusted R-squared:  0.7358 
## F-statistic: 44.17 on 2 and 29 DF,  p-value: 1.579e-09
pred_r_squared(lm02)
## [1] 0.6874984
library(car)
vif(lm02)
##         wt factor(am) 
##   1.921413   1.921413
plot(lm02)

q02 <- qplot(wt, mpg, data=mtcars, colour=am)
q02 <- q02 + geom_abline(intercept=summary(lm02)$coef[1,1], slope=summary(lm02)$coef[2,1], col="red")
q02

lm03 - Fit model with cylinders and transmission predicting mpg

lm03 <- lm(mpg ~ cyl + factor(am), data = mtcars)
summary(lm03)
## 
## Call:
## lm(formula = mpg ~ cyl + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.6856 -1.7172 -0.2657  1.8838  6.8144 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  34.5224     2.6032  13.262 7.69e-14 ***
## cyl          -2.5010     0.3608  -6.931 1.28e-07 ***
## factor(am)1   2.5670     1.2914   1.988   0.0564 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.059 on 29 degrees of freedom
## Multiple R-squared:  0.759,  Adjusted R-squared:  0.7424 
## F-statistic: 45.67 on 2 and 29 DF,  p-value: 1.094e-09
pred_r_squared(lm03)
## [1] 0.7082706
library(car)
vif(lm03)
##        cyl factor(am) 
##   1.375739   1.375739
plot(lm03)

q03 <- qplot(cyl, mpg, data=mtcars, colour=am)
q03 <- q03 + geom_abline(intercept=summary(lm03)$coef[1,1], slope=summary(lm03)$coef[2,1], col="red")
q03

lm04 - Fit model with disp and transmission predicting mpg

lm04 <- lm(mpg ~ disp + factor(am), data = mtcars)
summary(lm04)
## 
## Call:
## lm(formula = mpg ~ disp + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.6382 -2.4751 -0.5631  2.2333  6.8386 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 27.848081   1.834071  15.184 2.45e-15 ***
## disp        -0.036851   0.005782  -6.373 5.75e-07 ***
## factor(am)1  1.833458   1.436100   1.277    0.212    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.218 on 29 degrees of freedom
## Multiple R-squared:  0.7333, Adjusted R-squared:  0.7149 
## F-statistic: 39.87 on 2 and 29 DF,  p-value: 4.749e-09
pred_r_squared(lm04)
## [1] 0.6770371
library(car)
vif(lm04)
##       disp factor(am) 
##   1.537396   1.537396
plot(lm04)

q04 <- qplot(disp, mpg, data=mtcars, colour=am)
q04 <- q04 + geom_abline(intercept=summary(lm04)$coef[1,1], slope=summary(lm04)$coef[2,1], col="red")
q04

lm05 - Fit model with wt, cyl, disp, hp and transmission predicting mpg

lm05 <- lm(mpg ~ wt + cyl + disp + hp + factor(am), data = mtcars)
summary(lm05)
## 
## Call:
## lm(formula = mpg ~ wt + cyl + disp + hp + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5952 -1.5864 -0.7157  1.2821  5.5725 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 38.20280    3.66910  10.412 9.08e-11 ***
## wt          -3.30262    1.13364  -2.913  0.00726 ** 
## cyl         -1.10638    0.67636  -1.636  0.11393    
## disp         0.01226    0.01171   1.047  0.30472    
## hp          -0.02796    0.01392  -2.008  0.05510 .  
## factor(am)1  1.55649    1.44054   1.080  0.28984    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.505 on 26 degrees of freedom
## Multiple R-squared:  0.8551, Adjusted R-squared:  0.8273 
## F-statistic:  30.7 on 5 and 26 DF,  p-value: 4.029e-10
pred_r_squared(lm05)
## [1] 0.7849818
library(car)
vif(lm05)
##         wt        cyl       disp         hp factor(am) 
##   6.079452   7.209456  10.401420   4.501859   2.553064
plot(lm05)

lm06 - Fit model with wt, hp and transmission predicting mpg

lm06 <- lm(mpg ~ wt + hp + factor(am), data = mtcars)
summary(lm06)
## 
## Call:
## lm(formula = mpg ~ wt + hp + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4221 -1.7924 -0.3788  1.2249  5.5317 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.002875   2.642659  12.867 2.82e-13 ***
## wt          -2.878575   0.904971  -3.181 0.003574 ** 
## hp          -0.037479   0.009605  -3.902 0.000546 ***
## factor(am)1  2.083710   1.376420   1.514 0.141268    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.538 on 28 degrees of freedom
## Multiple R-squared:  0.8399, Adjusted R-squared:  0.8227 
## F-statistic: 48.96 on 3 and 28 DF,  p-value: 2.908e-11
pred_r_squared(lm06)
## [1] 0.7878597
library(car)
vif(lm06)
##         wt         hp factor(am) 
##   3.774838   2.088124   2.271082
plot(lm06)

lm07 - Fit model with wt, cyl and transmission predicting mpg

lm07 <- lm(mpg ~ wt + cyl + factor(am), data = mtcars)
summary(lm07)
## 
## Call:
## lm(formula = mpg ~ wt + cyl + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1735 -1.5340 -0.5386  1.5864  6.0812 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  39.4179     2.6415  14.923 7.42e-15 ***
## wt           -3.1251     0.9109  -3.431  0.00189 ** 
## cyl          -1.5102     0.4223  -3.576  0.00129 ** 
## factor(am)1   0.1765     1.3045   0.135  0.89334    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.612 on 28 degrees of freedom
## Multiple R-squared:  0.8303, Adjusted R-squared:  0.8122 
## F-statistic: 45.68 on 3 and 28 DF,  p-value: 6.51e-11
pred_r_squared(lm07)
## [1] 0.7756775
library(car)
vif(lm07)
##         wt        cyl factor(am) 
##   3.609011   2.584066   1.924955
plot(lm07)

lm08 - Fit model with wt interacting with transmission predicting mpg

lm08 <- lm(mpg ~ wt * factor(am), data = mtcars)
summary(lm08)
## 
## Call:
## lm(formula = mpg ~ wt * factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6004 -1.5446 -0.5325  0.9012  6.0909 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     31.4161     3.0201  10.402 4.00e-11 ***
## wt              -3.7859     0.7856  -4.819 4.55e-05 ***
## factor(am)1     14.8784     4.2640   3.489  0.00162 ** 
## wt:factor(am)1  -5.2984     1.4447  -3.667  0.00102 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.591 on 28 degrees of freedom
## Multiple R-squared:  0.833,  Adjusted R-squared:  0.8151 
## F-statistic: 46.57 on 3 and 28 DF,  p-value: 5.209e-11
pred_r_squared(lm08)
## [1] 0.7857928
library(car)
vif(lm08)
##            wt    factor(am) wt:factor(am) 
##      2.728248     20.901259     15.366853
plot(lm08)

lm09 - Fit model with wt interacting with transmission + qsec predicting mpg

lm09 <- lm(mpg ~ wt * am + qsec, data=mtcars)
summary(lm09)
## 
## Call:
## lm(formula = mpg ~ wt * am + qsec, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5076 -1.3801 -0.5588  1.0630  4.3684 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    9.723      5.899   1.648 0.110893    
## wt            -2.937      0.666  -4.409 0.000149 ***
## am            14.079      3.435   4.099 0.000341 ***
## qsec           1.017      0.252   4.035 0.000403 ***
## wt:am         -4.141      1.197  -3.460 0.001809 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.084 on 27 degrees of freedom
## Multiple R-squared:  0.8959, Adjusted R-squared:  0.8804 
## F-statistic: 58.06 on 4 and 27 DF,  p-value: 7.168e-13
pred_r_squared(lm09)
## [1] 0.8535095
library(car)
vif(lm09)
##        wt        am      qsec     wt:am 
##  3.030963 20.970925  1.447406 16.302453
plot(lm09)

lm10 - Fit weight, transmission, wt interacting with transmission

lm10 <- lm(mpg ~ wt + factor(am) + wt * factor(am), data=mtcars)
summary(lm10)
## 
## Call:
## lm(formula = mpg ~ wt + factor(am) + wt * factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6004 -1.5446 -0.5325  0.9012  6.0909 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     31.4161     3.0201  10.402 4.00e-11 ***
## wt              -3.7859     0.7856  -4.819 4.55e-05 ***
## factor(am)1     14.8784     4.2640   3.489  0.00162 ** 
## wt:factor(am)1  -5.2984     1.4447  -3.667  0.00102 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.591 on 28 degrees of freedom
## Multiple R-squared:  0.833,  Adjusted R-squared:  0.8151 
## F-statistic: 46.57 on 3 and 28 DF,  p-value: 5.209e-11
pred_r_squared(lm10)
## [1] 0.7857928
library(car)
vif(lm10)
##            wt    factor(am) wt:factor(am) 
##      2.728248     20.901259     15.366853
plot(lm10)

lm11 - Fit weight, transmission, cyl

lm11 <- lm(mpg ~ wt + factor(am) + factor(cyl), data=mtcars)
summary(lm11)
## 
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(cyl), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4898 -1.3116 -0.5039  1.4162  5.7758 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   33.7536     2.8135  11.997  2.5e-12 ***
## wt            -3.1496     0.9080  -3.469  0.00177 ** 
## factor(am)1    0.1501     1.3002   0.115  0.90895    
## factor(cyl)6  -4.2573     1.4112  -3.017  0.00551 ** 
## factor(cyl)8  -6.0791     1.6837  -3.611  0.00123 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.603 on 27 degrees of freedom
## Multiple R-squared:  0.8375, Adjusted R-squared:  0.8134 
## F-statistic: 34.79 on 4 and 27 DF,  p-value: 2.73e-10
pred_r_squared(lm11)
## [1] 0.7765213
library(car)
vif(lm11)
##                 GVIF Df GVIF^(1/(2*Df))
## wt          3.611208  1        1.900318
## factor(am)  1.925620  1        1.387667
## factor(cyl) 2.585745  2        1.268079
plot(lm11)

lm12 - Fit weight and hp

lm12 <- lm(mpg ~ wt + hp, data=mtcars)
summary(lm12)
## 
## Call:
## lm(formula = mpg ~ wt + hp, data = mtcars)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.941 -1.600 -0.182  1.050  5.854 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 37.22727    1.59879  23.285  < 2e-16 ***
## wt          -3.87783    0.63273  -6.129 1.12e-06 ***
## hp          -0.03177    0.00903  -3.519  0.00145 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.593 on 29 degrees of freedom
## Multiple R-squared:  0.8268, Adjusted R-squared:  0.8148 
## F-statistic: 69.21 on 2 and 29 DF,  p-value: 9.109e-12
pred_r_squared(lm12)
## [1] 0.7810871
library(car)
vif(lm12)
##       wt       hp 
## 1.766625 1.766625
plot(lm12)

lm13 - Fit weight and carb

lm13 <- lm(mpg ~ wt + carb, data=mtcars)
summary(lm13)
## 
## Call:
## lm(formula = mpg ~ wt + carb, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5206 -2.1223 -0.0467  1.4551  5.9736 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  37.7300     1.7602  21.435  < 2e-16 ***
## wt           -4.7646     0.5765  -8.265 4.12e-09 ***
## carb         -0.8215     0.3492  -2.353   0.0256 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.839 on 29 degrees of freedom
## Multiple R-squared:  0.7924, Adjusted R-squared:  0.7781 
## F-statistic: 55.36 on 2 and 29 DF,  p-value: 1.255e-10
pred_r_squared(lm13)
## [1] 0.7461679
library(car)
vif(lm13)
##       wt     carb 
## 1.223761 1.223761
plot(lm13)

lm14 - Fit weight and disp

lm14 <- lm(mpg ~ wt + disp, data=mtcars)
summary(lm14)
## 
## Call:
## lm(formula = mpg ~ wt + disp, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4087 -2.3243 -0.7683  1.7721  6.3484 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.96055    2.16454  16.151 4.91e-16 ***
## wt          -3.35082    1.16413  -2.878  0.00743 ** 
## disp        -0.01773    0.00919  -1.929  0.06362 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.917 on 29 degrees of freedom
## Multiple R-squared:  0.7809, Adjusted R-squared:  0.7658 
## F-statistic: 51.69 on 2 and 29 DF,  p-value: 2.744e-10
pred_r_squared(lm14)
## [1] 0.725321
library(car)
vif(lm14)
##       wt     disp 
## 4.728319 4.728319
plot(lm14)

lm15 - Fit weight and factor(vs)

lm15 <- lm(mpg ~ wt + factor(vs), data=mtcars)
summary(lm15)
## 
## Call:
## lm(formula = mpg ~ wt + factor(vs), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7071 -2.4415 -0.3129  1.4319  6.0156 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  33.0042     2.3554  14.012 1.92e-14 ***
## wt           -4.4428     0.6134  -7.243 5.63e-08 ***
## factor(vs)1   3.1544     1.1907   2.649   0.0129 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.78 on 29 degrees of freedom
## Multiple R-squared:  0.801,  Adjusted R-squared:  0.7873 
## F-statistic: 58.36 on 2 and 29 DF,  p-value: 6.818e-11
pred_r_squared(lm15)
## [1] 0.7530933
library(car)
vif(lm15)
##         wt factor(vs) 
##   1.444943   1.444943
plot(lm15)

lm16 - Fit hp and factor(vs)

lm16 <- lm(mpg ~ hp + factor(vs), data=mtcars)
summary(lm16)
## 
## Call:
## lm(formula = mpg ~ hp + factor(vs), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.7131 -2.3336 -0.1332  1.9055  7.9055 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 26.96300    2.89069   9.328 3.13e-10 ***
## hp          -0.05453    0.01448  -3.766 0.000752 ***
## factor(vs)1  2.57622    1.96966   1.308 0.201163    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.818 on 29 degrees of freedom
## Multiple R-squared:  0.6246, Adjusted R-squared:  0.5987 
## F-statistic: 24.12 on 2 and 29 DF,  p-value: 6.768e-07
pred_r_squared(lm16)
## [1] 0.5082001
library(car)
vif(lm16)
##         hp factor(vs) 
##    2.09586    2.09586
plot(lm16)

lm17 - Fit disp and factor(vs)

lm17 <- lm(mpg ~ disp + factor(vs), data=mtcars)
summary(lm17)
## 
## Call:
## lm(formula = mpg ~ disp + factor(vs), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.4605 -2.0260 -0.6467  1.7285  7.0790 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 27.949282   2.201166  12.697 2.27e-13 ***
## disp        -0.036896   0.006715  -5.494 6.43e-06 ***
## factor(vs)1  1.495004   1.651290   0.905    0.373    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.261 on 29 degrees of freedom
## Multiple R-squared:  0.7261, Adjusted R-squared:  0.7072 
## F-statistic: 38.44 on 2 and 29 DF,  p-value: 7.005e-09
pred_r_squared(lm17)
## [1] 0.66982
library(car)
vif(lm17)
##       disp factor(vs) 
##   2.018941   2.018941
plot(lm17)

Results of stepwise selection using MASS stepAIC

library(MASS)
fit<- lm(mpg~cyl+disp+hp+drat+wt+qsec+gear+carb,data=mtcars)
step <- stepAIC(fit, direction="both")
## Start:  AIC=69.12
## mpg ~ cyl + disp + hp + drat + wt + qsec + gear + carb
## 
##        Df Sum of Sq    RSS    AIC
## - carb  1    0.6725 158.80 67.260
## - cyl   1    1.2886 159.41 67.384
## - drat  1    3.2639 161.39 67.778
## - disp  1    3.7211 161.84 67.869
## - qsec  1    5.2229 163.35 68.164
## - gear  1    5.3164 163.44 68.183
## - hp    1    6.9592 165.08 68.503
## <none>              158.12 69.125
## - wt    1   30.4892 188.61 72.767
## 
## Step:  AIC=67.26
## mpg ~ cyl + disp + hp + drat + wt + qsec + gear
## 
##        Df Sum of Sq    RSS    AIC
## - cyl   1     2.055 160.85 65.672
## - drat  1     2.810 161.61 65.822
## - gear  1     4.681 163.48 66.190
## - qsec  1     7.390 166.19 66.716
## <none>              158.80 67.260
## - disp  1    11.211 170.01 67.443
## - hp    1    12.981 171.78 67.775
## + carb  1     0.673 158.12 69.125
## - wt    1    76.291 235.09 77.815
## 
## Step:  AIC=65.67
## mpg ~ disp + hp + drat + wt + qsec + gear
## 
##        Df Sum of Sq    RSS    AIC
## - drat  1     4.936 165.79 64.639
## - gear  1     9.278 170.13 65.466
## - disp  1     9.846 170.70 65.573
## <none>              160.85 65.672
## - qsec  1    17.833 178.69 67.036
## - hp    1    18.419 179.27 67.141
## + cyl   1     2.055 158.80 67.260
## + carb  1     1.439 159.41 67.384
## - wt    1    84.464 245.32 77.178
## 
## Step:  AIC=64.64
## mpg ~ disp + hp + wt + qsec + gear
## 
##        Df Sum of Sq    RSS    AIC
## - disp  1     8.692 174.48 64.274
## <none>              165.79 64.639
## + drat  1     4.936 160.85 65.672
## + cyl   1     4.182 161.61 65.822
## - qsec  1    17.694 183.48 65.884
## - gear  1    19.848 185.63 66.258
## - hp    1    20.149 185.94 66.310
## + carb  1     0.890 164.90 66.467
## - wt    1    91.501 257.29 76.703
## 
## Step:  AIC=64.27
## mpg ~ hp + wt + qsec + gear
## 
##        Df Sum of Sq    RSS    AIC
## - qsec  1    11.057 185.54 64.241
## <none>              174.48 64.274
## - gear  1    11.579 186.06 64.331
## + disp  1     8.692 165.79 64.639
## - hp    1    13.505 187.99 64.660
## + carb  1     6.845 167.63 64.994
## + drat  1     3.783 170.70 65.573
## + cyl   1     1.973 172.51 65.911
## - wt    1   105.091 279.57 77.361
## 
## Step:  AIC=64.24
## mpg ~ hp + wt + gear
## 
##        Df Sum of Sq    RSS    AIC
## - gear  1     9.510 195.05 63.840
## <none>              185.54 64.241
## + qsec  1    11.057 174.48 64.274
## + carb  1    10.850 174.69 64.312
## + cyl   1     9.773 175.76 64.509
## + drat  1     4.176 181.36 65.512
## + disp  1     2.056 183.48 65.884
## - hp    1    91.647 277.19 75.086
## - wt    1    94.554 280.09 75.420
## 
## Step:  AIC=63.84
## mpg ~ hp + wt
## 
##        Df Sum of Sq    RSS    AIC
## + cyl   1    18.427 176.62 62.665
## <none>              195.05 63.840
## + drat  1    11.366 183.68 63.919
## + gear  1     9.510 185.54 64.241
## + qsec  1     8.988 186.06 64.331
## + carb  1     0.300 194.75 65.791
## + disp  1     0.057 194.99 65.831
## - hp    1    83.274 278.32 73.217
## - wt    1   252.627 447.67 88.427
## 
## Step:  AIC=62.66
## mpg ~ hp + wt + cyl
## 
##        Df Sum of Sq    RSS    AIC
## <none>              176.62 62.665
## - hp    1    14.551 191.17 63.198
## + disp  1     6.176 170.44 63.526
## - cyl   1    18.427 195.05 63.840
## + carb  1     2.519 174.10 64.205
## + drat  1     2.245 174.38 64.255
## + qsec  1     1.401 175.22 64.410
## + gear  1     0.856 175.76 64.509
## - wt    1   115.354 291.98 76.750
step$anova
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## mpg ~ cyl + disp + hp + drat + wt + qsec + gear + carb
## 
## Final Model:
## mpg ~ hp + wt + cyl
## 
## 
##     Step Df   Deviance Resid. Df Resid. Dev      AIC
## 1                             23   158.1237 69.12453
## 2 - carb  1  0.6725191        24   158.7962 67.26034
## 3  - cyl  1  2.0553287        25   160.8515 65.67186
## 4 - drat  1  4.9362500        26   165.7878 64.63912
## 5 - disp  1  8.6924752        27   174.4802 64.27442
## 6 - qsec  1 11.0574885        28   185.5377 64.24071
## 7 - gear  1  9.5100407        29   195.0478 63.84027
## 8  + cyl  1 18.4272345        28   176.6205 62.66456

lm18 - Fit hp, wt, and cyl

lm18 <- lm(mpg ~ hp + wt + cyl, data=mtcars)
summary(lm18)
## 
## Call:
## lm(formula = mpg ~ hp + wt + cyl, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9290 -1.5598 -0.5311  1.1850  5.8986 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 38.75179    1.78686  21.687  < 2e-16 ***
## hp          -0.01804    0.01188  -1.519 0.140015    
## wt          -3.16697    0.74058  -4.276 0.000199 ***
## cyl         -0.94162    0.55092  -1.709 0.098480 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.512 on 28 degrees of freedom
## Multiple R-squared:  0.8431, Adjusted R-squared:  0.8263 
## F-statistic: 50.17 on 3 and 28 DF,  p-value: 2.184e-11
pred_r_squared(lm18)
## [1] 0.7956775
library(car)
vif(lm18)
##       hp       wt      cyl 
## 3.258481 2.580486 4.757456
plot(lm18)

Some multiple factor models and cyl,gear,carb as continuous variables

summary(lm(mpg~wt+factor(am)+factor(vs),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(vs), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7733 -2.2519 -0.3445  1.4129  5.6594 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  30.0787     3.7480   8.025 9.71e-09 ***
## wt           -3.7845     0.8981  -4.214 0.000236 ***
## factor(am)1   1.4913     1.4863   1.003 0.324262    
## factor(vs)1   3.6150     1.2761   2.833 0.008454 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.779 on 28 degrees of freedom
## Multiple R-squared:  0.8079, Adjusted R-squared:  0.7873 
## F-statistic: 39.25 on 3 and 28 DF,  p-value: 3.659e-10
summary(lm(mpg~wt+factor(am)+factor(cyl),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(cyl), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4898 -1.3116 -0.5039  1.4162  5.7758 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   33.7536     2.8135  11.997  2.5e-12 ***
## wt            -3.1496     0.9080  -3.469  0.00177 ** 
## factor(am)1    0.1501     1.3002   0.115  0.90895    
## factor(cyl)6  -4.2573     1.4112  -3.017  0.00551 ** 
## factor(cyl)8  -6.0791     1.6837  -3.611  0.00123 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.603 on 27 degrees of freedom
## Multiple R-squared:  0.8375, Adjusted R-squared:  0.8134 
## F-statistic: 34.79 on 4 and 27 DF,  p-value: 2.73e-10
summary(lm(mpg~wt+factor(am)+factor(gear),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(gear), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5798 -2.4056 -0.3692  1.8198  5.7713 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    35.0955     3.1862  11.015 1.72e-11 ***
## wt             -4.8782     0.7945  -6.140 1.46e-06 ***
## factor(am)1     0.1883     1.9942   0.094    0.925    
## factor(gear)4   2.0769     1.7343   1.198    0.242    
## factor(gear)5  -1.0615     2.3845  -0.445    0.660    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.968 on 27 degrees of freedom
## Multiple R-squared:  0.7888, Adjusted R-squared:  0.7575 
## F-statistic:  25.2 on 4 and 27 DF,  p-value: 8.931e-09
summary(lm(mpg~wt+factor(am)+factor(carb),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(carb), data = mtcars)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.301 -1.906  0.000  1.381  5.179 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     32.688      3.669   8.910 4.45e-09 ***
## wt              -3.523      1.058  -3.329   0.0028 ** 
## factor(am)1      2.498      1.827   1.367   0.1843    
## factor(carb)2   -1.201      1.495  -0.804   0.4295    
## factor(carb)3   -2.789      2.271  -1.228   0.2312    
## factor(carb)4   -3.917      1.875  -2.089   0.0475 *  
## factor(carb)6   -5.727      3.354  -1.707   0.1007    
## factor(carb)8   -7.609      3.670  -2.073   0.0491 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.987 on 24 degrees of freedom
## Multiple R-squared:  0.8098, Adjusted R-squared:  0.7543 
## F-statistic:  14.6 on 7 and 24 DF,  p-value: 2.913e-07
summary(lm(mpg~wt+cyl+factor(am),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + cyl + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1735 -1.5340 -0.5386  1.5864  6.0812 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  39.4179     2.6415  14.923 7.42e-15 ***
## wt           -3.1251     0.9109  -3.431  0.00189 ** 
## cyl          -1.5102     0.4223  -3.576  0.00129 ** 
## factor(am)1   0.1765     1.3045   0.135  0.89334    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.612 on 28 degrees of freedom
## Multiple R-squared:  0.8303, Adjusted R-squared:  0.8122 
## F-statistic: 45.68 on 3 and 28 DF,  p-value: 6.51e-11
summary(lm(mpg~wt+hp+factor(am),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + hp + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4221 -1.7924 -0.3788  1.2249  5.5317 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.002875   2.642659  12.867 2.82e-13 ***
## wt          -2.878575   0.904971  -3.181 0.003574 ** 
## hp          -0.037479   0.009605  -3.902 0.000546 ***
## factor(am)1  2.083710   1.376420   1.514 0.141268    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.538 on 28 degrees of freedom
## Multiple R-squared:  0.8399, Adjusted R-squared:  0.8227 
## F-statistic: 48.96 on 3 and 28 DF,  p-value: 2.908e-11
summary(lm(mpg~wt+gear+factor(am),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + gear + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1663 -2.4342 -0.2539  1.5132  6.6583 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  39.2114     5.2849   7.420 4.43e-08 ***
## wt           -5.3798     0.8017  -6.710 2.77e-07 ***
## gear         -0.5570     1.2619  -0.441    0.662    
## factor(am)1   0.5938     2.1009   0.283    0.780    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.142 on 28 degrees of freedom
## Multiple R-squared:  0.7545, Adjusted R-squared:  0.7282 
## F-statistic: 28.69 on 3 and 28 DF,  p-value: 1.097e-08
summary(lm(mpg~wt+carb+factor(am),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + carb + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5856 -2.1105  0.1393  1.5248  5.1851 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  34.0163     2.9713  11.448 4.49e-12 ***
## wt           -3.6340     0.9281  -3.915 0.000527 ***
## carb         -1.1593     0.4063  -2.853 0.008046 ** 
## factor(am)1   2.5263     1.6479   1.533 0.136490    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.775 on 28 degrees of freedom
## Multiple R-squared:  0.8085, Adjusted R-squared:  0.788 
## F-statistic: 39.41 on 3 and 28 DF,  p-value: 3.5e-10